Skip to contentMethod: EmbeddedAudioMetadataImporter.Pair(IRI, String)
1: /*
2: * *********************************************************************************************************************
3: *
4: * blueMarine II: Semantic Media Centre
5: * http://tidalwave.it/projects/bluemarine2
6: *
7: * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
8: *
9: * *********************************************************************************************************************
10: *
11: * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
12: * the License. You may obtain a copy of the License at
13: *
14: * http://www.apache.org/licenses/LICENSE-2.0
15: *
16: * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
17: * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
18: * specific language governing permissions and limitations under the License.
19: *
20: * *********************************************************************************************************************
21: *
22: * git clone https://bitbucket.org/tidalwave/bluemarine2-src
23: * git clone https://github.com/tidalwave-it/bluemarine2-src
24: *
25: * *********************************************************************************************************************
26: */
27: package it.tidalwave.bluemarine2.metadata.impl.audio.embedded;
28:
29: import javax.annotation.Nonnull;
30: import javax.annotation.concurrent.Immutable;
31: import javax.inject.Inject;
32: import java.time.Duration;
33: import java.time.Instant;
34: import java.util.List;
35: import java.util.Optional;
36: import java.util.stream.Stream;
37: import java.io.IOException;
38: import java.nio.file.Files;
39: import java.nio.file.Path;
40: import org.eclipse.rdf4j.model.IRI;
41: import org.eclipse.rdf4j.model.Model;
42: import org.eclipse.rdf4j.model.Value;
43: import org.eclipse.rdf4j.model.vocabulary.DC;
44: import org.eclipse.rdf4j.model.vocabulary.FOAF;
45: import org.eclipse.rdf4j.model.vocabulary.RDF;
46: import org.eclipse.rdf4j.model.vocabulary.RDFS;
47: import it.tidalwave.util.ConcurrentHashMapWithOptionals;
48: import it.tidalwave.util.Id;
49: import it.tidalwave.util.TimeProvider;
50: import it.tidalwave.util.annotation.VisibleForTesting;
51: import it.tidalwave.messagebus.annotation.ListensTo;
52: import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
53: import it.tidalwave.bluemarine2.util.ModelBuilder;
54: import it.tidalwave.bluemarine2.model.MediaItem;
55: import it.tidalwave.bluemarine2.model.MediaItem.Metadata;
56: import it.tidalwave.bluemarine2.model.spi.PathAwareEntity;
57: import it.tidalwave.bluemarine2.model.vocabulary.*;
58: import it.tidalwave.bluemarine2.mediascanner.impl.MediaItemImportRequest;
59: import it.tidalwave.bluemarine2.mediascanner.impl.ProgressHandler;
60: import it.tidalwave.bluemarine2.mediascanner.impl.StatementManager;
61: import lombok.Getter;
62: import lombok.RequiredArgsConstructor;
63: import lombok.ToString;
64: import lombok.extern.slf4j.Slf4j;
65: import static java.util.Collections.emptyList;
66: import static java.util.stream.Collectors.*;
67: import static it.tidalwave.bluemarine2.util.Formatters.*;
68: import static it.tidalwave.bluemarine2.util.RdfUtilities.*;
69: import static it.tidalwave.bluemarine2.model.MediaItem.Metadata.*;
70:
71: /***********************************************************************************************************************
72: *
73: * This class generates RDF triples out of the {@link Metadata} embedded in an audio file.
74: *
75: * <pre>
76: * mo:AudioFile
77: * IRI computed from the fingerprint of the contents
78: * bm:importedFrom http://bluemarine.tidalwave.it/source#embedded
79: * rdfs:label the display name
80: * dc:title the title
81: * mo:encodes points to the signal
82: * bm:latestIndexingTime the latest import time
83: * bm:path the path of the file
84: * bm:fileSize the file size
85: * foaf:sha1 the fingerprint of the file
86: *
87: * mo:DigitalSignal
88: * IRI computed from the fingerprint of related file
89: * bm:importedFrom http://bluemarine.tidalwave.it/source#embedded
90: * mo:bitsPerSample the bits per sample
91: * mo:duration the duration
92: * mo:sample_rate the sample rate
93: * mo:published_as points to the Track
94: * MISSING mo:channels
95: * MISSING mo:time
96: * MISSING mo:trmid
97: *
98: * mo:Track
99: * IRI computed from the fingerprint of related file
100: * bm:importedFrom http://bluemarine.tidalwave.it/source#embedded
101: * rdfs:label the display name
102: * dc:title the title
103: * mo:track_number the track number in the record
104: * bm:discCount the number of disks in a collection
105: * bm:discNumber the index of the disk in a collection
106: * bm:iTunesCddb1 the CDDB1 attribute encoded by iTunes plus the track index
107: * foaf:maker points to the MusicArtists
108: *
109: * mo:Record
110: * IRI computed from the fingerprint of the name
111: * bm:importedFrom http://bluemarine.tidalwave.it/source#embedded
112: * rdfs:label the display name (ALBUM from audiofile metadata, or the name of the folder)
113: * dc:title the title (see above)
114: * mo:mediaType CD
115: * mo:track points to the Tracks
116: * bm:iTunesCddb1 the CDDB1 attribute encoded by iTunes
117: * foaf:maker points to the MusicArtists (union of the makers of Tracks)
118: * MISSING dc:date
119: * MISSING dc:language
120: * MISSING mo:release TODO points to the Label (EMI, etc...)
121: *
122: * mo:MusicArtist
123: * IRI computed from the fingerprint of the name
124: * bm:importedFrom http://bluemarine.tidalwave.it/source#embedded
125: * rdfs:label the display name
126: * foaf:name the name
127: * (in case of a group also the predicates below)
128: * dbtune:artist_type 2, which means a group
129: * purl:collaborates_with the MusicArtists in the group
130: * </pre>
131: *
132: * @author Fabrizio Giudici
133: *
134: **********************************************************************************************************************/
135: @SimpleMessageSubscriber @Slf4j
136: public class EmbeddedAudioMetadataImporter
137: {
138: /*******************************************************************************************************************
139: *
140: *
141: *
142: ******************************************************************************************************************/
143: @Immutable @RequiredArgsConstructor @Getter @ToString
144: static class Pair
145: {
146: @Nonnull
147: private final IRI iri;
148:
149: @Nonnull
150: private final String name;
151: }
152:
153: @Inject
154: private StatementManager statementManager;
155:
156: @Inject
157: private TimeProvider timeProvider;
158:
159: @Inject
160: private ProgressHandler progress;
161:
162: // Set would suffice, but there's no ConcurrentSet
163: private final ConcurrentHashMapWithOptionals<IRI, Optional<String>> seenArtistUris =
164: new ConcurrentHashMapWithOptionals<>();
165:
166: private final ConcurrentHashMapWithOptionals<IRI, Boolean> seenRecordUris = new ConcurrentHashMapWithOptionals<>();
167:
168: /*******************************************************************************************************************
169: *
170: *
171: *
172: ******************************************************************************************************************/
173: private void reset()
174: {
175: // FIXME: should load existing URIs from the Persistence
176: seenArtistUris.clear();
177: seenRecordUris.clear();
178: }
179:
180: /*******************************************************************************************************************
181: *
182: *
183: ******************************************************************************************************************/
184: @VisibleForTesting void onMediaItemImportRequest (@ListensTo final MediaItemImportRequest request)
185: {
186: request.getSha1().ifPresent(sha1 ->
187: {
188: try
189: {
190: log.info("onMediaItemImportRequest({})", request);
191: statementManager.requestAdd(importMediaItem(request.getMediaItem(), sha1));
192: }
193: finally
194: {
195: progress.incrementImportedMediaItems();
196: }
197: });
198: }
199:
200: /*******************************************************************************************************************
201: *
202: * Processes a {@link MediaItem}.
203: *
204: * @param mediaItem the item
205: * @param sha2 the fingerprint of the file
206: * @return the model
207: *
208: ******************************************************************************************************************/
209: @Nonnull
210: private Model importMediaItem (@Nonnull final MediaItem mediaItem, @Nonnull final byte[] sha1)
211: {
212: log.debug("importMediaItem({})", mediaItem);
213:
214: final Metadata metadata = mediaItem.getMetadata();
215:
216: final Optional<String> trackTitle = metadata.get(TITLE);
217: final Optional<String> makerName = metadata.get(ARTIST);
218: final PathAwareEntity parent = mediaItem.getParent().get();
219: final String recordTitle = metadata.get(ALBUM).orElse(parent.getPath().toFile().getName());
220: final Optional<Integer> diskCount = emptyIfOne(metadata.get(DISK_COUNT));
221: final Optional<Integer> diskNumber = diskCount.flatMap(dc -> metadata.get(DISK_NUMBER));
222: final Id uniqueId = uniqueTrackId(metadata, toBase64String(sha1));
223: final IRI audioFileIri = BMMO.audioFileIriFor(toBase64String(sha1));
224: final IRI signalIri = BMMO.signalIriFor(uniqueId);
225: final IRI trackIri = BMMO.trackIriFor(uniqueId);
226: final IRI recordIri = recordIriOf(metadata, recordTitle);
227: final Optional<IRI> newRecordIri = seenRecordUris.putIfAbsentAndGetNewKey(recordIri, true);
228:
229: final List<IRI> makerUris = makerName.map(name -> List.of(artistIriOf(name))).orElse(emptyList());
230: final List<Pair> artists = makerName.map(name -> Stream.of(name.split("[;]")).map(String::trim)).orElse(Stream.empty())
231: .map(name -> new Pair(artistIriOf(name), name))
232: .collect(toList());
233:
234: final List<Pair> newArtists = artists.stream().filter(
235: p -> seenArtistUris.putIfAbsentAndGetNewKey(p.getIri(), Optional.empty()).isPresent())
236: .collect(toList());
237: final List<IRI> newArtistIris = newArtists.stream().map(Pair::getIri).collect(toList());
238: final List<Value> newArtistLiterals = newArtists.stream().map(p -> literalFor(p.getName())).collect(toList());
239:
240: final Optional<IRI> newGroupIri = (artists.size() <= 1) ? Optional.empty()
241: : seenArtistUris.putIfAbsentAndGetNewKey(makerUris.get(0), Optional.empty()); // FIXME: only first one?
242:
243: return new ModelBuilder()
244: .with( audioFileIri, RDF.TYPE, MO.C_AUDIO_FILE)
245: .with( audioFileIri, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
246: .with( audioFileIri, FOAF.SHA1, literalFor(toHexString(sha1)))
247: .with( audioFileIri, MO.P_ENCODES, signalIri)
248: .with( audioFileIri, BMMO.P_PATH, literalFor(mediaItem.getRelativePath()))
249: .with( audioFileIri, BMMO.P_LATEST_INDEXING_TIME, literalFor(getLastModifiedTime(mediaItem.getPath())))
250: .withOptional(audioFileIri, BMMO.P_FILE_SIZE, literalForLong(metadata.get(FILE_SIZE)))
251:
252: .with( signalIri, RDF.TYPE, MO.C_DIGITAL_SIGNAL)
253: .with( signalIri, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
254: .with( signalIri, MO.P_PUBLISHED_AS, trackIri)
255: .withOptional(signalIri, MO.P_SAMPLE_RATE, literalForInt(metadata.get(SAMPLE_RATE)))
256: .withOptional(signalIri, MO.P_BITS_PER_SAMPLE, literalForInt(metadata.get(BIT_RATE)))
257: .withOptional(signalIri, MO.P_DURATION, literalForFloat(metadata.get(DURATION)
258: .map(Duration::toMillis)
259: .map(l -> (float)l)))
260: .with( trackIri, RDF.TYPE, MO.C_TRACK)
261: .with( trackIri, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
262: .withOptional(trackIri, BMMO.P_ITUNES_CDDB1, literalFor(metadata.get(ITUNES_COMMENT)
263: .map(ITunesComment::getTrackId)))
264: .withOptional(trackIri, MO.P_TRACK_NUMBER, literalForInt(metadata.get(TRACK_NUMBER)))
265: .withOptional(trackIri, RDFS.LABEL, literalFor(trackTitle))
266: .withOptional(trackIri, DC.TITLE, literalFor(trackTitle))
267: .with( trackIri, FOAF.MAKER, makerUris.stream())
268:
269: .withOptional(newRecordIri, RDF.TYPE, MO.C_RECORD)
270: .withOptional(newRecordIri, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
271: .withOptional(newRecordIri, MO.P_MEDIA_TYPE, MO.C_CD)
272: .withOptional(newRecordIri, RDFS.LABEL, literalFor(recordTitle))
273: .withOptional(newRecordIri, DC.TITLE, literalFor(recordTitle))
274: .withOptional(newRecordIri, MO.P_TRACK_COUNT, literalForInt(metadata.get(CDDB)
275: .map(Cddb::getTrackCount)))
276: .withOptional(newRecordIri, BMMO.P_DISK_NUMBER, literalForInt(diskNumber))
277: .withOptional(newRecordIri, BMMO.P_DISK_COUNT, literalForInt(diskCount))
278: .withOptional(newRecordIri, BMMO.P_ITUNES_CDDB1, literalFor(metadata.get(ITUNES_COMMENT)
279: .map(ITunesComment::getCddb1)))
280: .with( recordIri, MO.P_TRACK, trackIri)
281: .with( recordIri, FOAF.MAKER, makerUris.stream())
282:
283: .with( newArtistIris, RDF.TYPE, MO.C_MUSIC_ARTIST)
284: .with( newArtistIris, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
285: .with( newArtistIris, RDFS.LABEL, newArtistLiterals)
286: .with( newArtistIris, FOAF.NAME, newArtistLiterals)
287:
288: .withOptional(newGroupIri, RDF.TYPE, MO.C_MUSIC_ARTIST)
289: .withOptional(newGroupIri, BMMO.P_IMPORTED_FROM, BMMO.O_SOURCE_EMBEDDED)
290: .withOptional(newGroupIri, RDFS.LABEL, literalFor(makerName))
291: .withOptional(newGroupIri, FOAF.NAME, literalFor(makerName))
292: .withOptional(newGroupIri, DbTune.P_ARTIST_TYPE, literalFor((short)2))
293: .withOptional(newGroupIri, Purl.P_COLLABORATES_WITH, artists.stream().map(Pair::getIri))
294: .toModel();
295: }
296:
297: /*******************************************************************************************************************
298: *
299: *
300: ******************************************************************************************************************/
301: @Nonnull
302: private Instant getLastModifiedTime (@Nonnull final Path path)
303: {
304: try
305: {
306: return Files.getLastModifiedTime(path).toInstant();
307: }
308: catch (IOException e) // should never happen
309: {
310: log.warn("Cannot get last modified time for {}: assuming now", path);
311: return timeProvider.currentInstant();
312: }
313: }
314:
315: /*******************************************************************************************************************
316: *
317: *
318: ******************************************************************************************************************/
319: @Nonnull
320: public static IRI recordIriOf (@Nonnull final Metadata metadata, @Nonnull final String recordTitle)
321: {
322: final Optional<Cddb> cddb = metadata.get(CDDB);
323: return BMMO.recordIriFor(cddb.map(value -> createSha1IdNew(value.getToc()))
324: .orElseGet(() -> createSha1IdNew("RECORD:" + recordTitle)));
325: }
326:
327: /*******************************************************************************************************************
328: *
329: *
330: ******************************************************************************************************************/
331: @Nonnull
332: private Id uniqueTrackId (@Nonnull final Metadata metadata, @Nonnull final String default_)
333: {
334: final Optional<Cddb> cddb = metadata.get(CDDB);
335: final Optional<Integer> trackNumber = metadata.get(TRACK_NUMBER);
336:
337: return (cddb.isPresent() && trackNumber.isPresent())
338: ? createSha1IdNew(cddb.get().getToc() + "/" + trackNumber.get())
339: : Id.of(default_);
340: }
341:
342: /*******************************************************************************************************************
343: *
344: *
345: ******************************************************************************************************************/
346: @Nonnull
347: private IRI artistIriOf (@Nonnull final String name)
348: {
349: return BMMO.artistIriFor(createSha1IdNew("ARTIST:" + name));
350: }
351:
352: /*******************************************************************************************************************
353: *
354: *
355: *
356: ******************************************************************************************************************/
357: @Nonnull
358: private static Optional<Integer> emptyIfOne (@Nonnull final Optional<Integer> number)
359: {
360: return number.flatMap(n -> (n == 1) ? Optional.empty() : Optional.of(n));
361: }
362: }